*-------------------------------------------------------------------------------
*       POLITICIANS AND TAX POLICY: The Role of Preferences and Beliefs
*-------------------------------------------------------------------------------
*-------------------------------------------------------------------------------
* Description: this .do merges all datasets, created variables and all the 
* tables and figures included in the paper.
*-------------------------------------------------------------------------------

clear all
global clear
set more off
graph set window fontface "Times New Roman"

*Setting directories
********************************************************************************
if `"`c(os)'"' == "MacOSX"	global path   `"/users/`c(username)'/Dropbox/Research info & policy/Taxpolicy_followup/Data and CodeNTJ_forpublic"'
if `"`c(os)'"' == "Windows"	global path   `"C:/Users/`c(username)'/Dropbox/Research info & policy/Taxpolicy_followup/Data and CodeNTJ_forpublic"'

global rawdata 		"${path}/raw data" 
global analysis 	"${path}/data analysis" 
********************************************************************************

********************************************************************************
***************************** MAIN TABLES **************************************
********************************************************************************

*----------------------- REVENUE AND EXPENDITURES -----------------------------* 

// Revenues 
use  "${rawdata}/clean data/revenues.dta", clear 

reshape wide share valorpc_usd, i(cod_ibge) j(revenues) string

mat tab=J(20,6,.)
local j=1

* Share of total
local share sharetotal sharetax_total sharetax_iss sharetax_iptu sharetax_itbi sharetax_other sharetransfer_total sharetrans_central sharetrans_central_fpm sharetrans_state sharetrans_other sharerev_other

foreach var in `share'{
local lbl : variable label `var' 
dis "`lbl'"

sum `var' 
mat tab[`j',1] = r(N) 
mat tab[`j',2] = r(mean) 
mat tab[`j',3] = r(sd) 
	
local j=`j'+1
}

* Revenue per capita
local pc valorpc_usdtotal valorpc_usdtax_total valorpc_usdtax_iss valorpc_usdtax_iptu valorpc_usdtax_itbi valorpc_usdtax_other valorpc_usdtransfer_total valorpc_usdtrans_central valorpc_usdtrans_central_fpm valorpc_usdtrans_state valorpc_usdtrans_other valorpc_usdrev_other

local j=1
foreach var in `pc'{
local lbl : variable label `var' 
dis "`lbl'"

sum `var' 
mat tab[`j',4] = r(N) 
mat tab[`j',5] = r(mean) 
mat tab[`j',6] = r(sd) 
	
local j=`j'+1
}

// Expenses  
use  "${rawdata}/clean data/expenditures.dta", clear 

reshape wide share valorpc_usd, i(cod_ibge) j(expenses) string

* Share of total
local share sharetotal sharehealth shareeducation sharesanitation sharetransport shareurban shareadministration shareother

local j=13
foreach var in `share'{

local lbl : variable label `var' 
dis "`lbl'"

sum `var' 
mat tab[`j',1] = r(N) 
mat tab[`j',2] = r(mean) 
mat tab[`j',3] = r(sd) 
	
local j=`j'+1
}

* Expenses per capita
local pc valorpc_usdtotal valorpc_usdhealth valorpc_usdeducation valorpc_usdsanitation valorpc_usdtransport valorpc_usdurban valorpc_usdadministration valorpc_usdother

local j=13
foreach var in `pc'{

local lbl : variable label `var' 
dis "`lbl'"

sum `var' 
mat tab[`j',4] = r(N) 
mat tab[`j',5] = r(mean) 
mat tab[`j',6] = r(sd) 
	
local j=`j'+1
}
matrix list tab 

*----------------------------- SUMMARY STATISTICS -----------------------------* 

use "${analysis}/secondary/samples.dta", clear

//merge datasets 
merge m:m cod_ibge using "${rawdata}/clean data/population.dta", keep(1 3) nogen
merge m:m cod_ibge using "${rawdata}/clean data/policyexperiment_mayor_char.dta", keep(1 3) nogen 
merge m:m cod_ibge using "${rawdata}/clean data/vote_margin.dta", keep(1 3) nogen 
merge m:m cod_ibge using "${rawdata}/clean data/census.dta", keep(1 3) nogen  
merge m:m cod_ibge using "${rawdata}/clean data/tax_revenues.dta", keep(1 3) nogen 
merge m:m cod_ibge using "${rawdata}/clean data/munic_survey.dta", keep(1 3) nogen

// we express all variables that are in % in percentage points
foreach var in "male_mayor" "college_mayor" "mandate2_mayor" "vtmargin_mayor" "party_left_mayor" "gini_index" "big_south" "share_taxes_2010_2015" "iptu_2015" "cad_issqn_2015"{
replace `var' = `var'*100 
}

replace popul_2016= popul_2016/1000 // poulation in thousands

// append restricted samples
preserve
drop sample_all
keep if sample_notinfluenced == 1 
tempfile sample_notinfluenced
save `sample_notinfluenced'
restore

preserve
drop sample_notinfluenced
append using `sample_notinfluenced'

replace sample_notinfluenced = 0 if sample_all == 1 

global vars popul_2016 gini_index poverty share_taxes_2010_2015 /// 
male_mayor age_mayor college_mayor mandate2_mayor ///
iptu_2015 cad_issqn_2015 

mat tab=J(10,8,.)
local j=1

foreach var in $vars {

local lbl : variable label `var' 
dis "`lbl'"

// Column 1-3 (restricted sample) 
sum `var' if sample_notinfluenced == 1
mat tab[`j',1] = r(N) 
mat tab[`j',2] = r(mean) 
mat tab[`j',3] = r(sd) 
	
// Column 4-6 (all 5570 munic) 
sum `var' if sample_all == 1
mat tab[`j',4] = r(N) 
mat tab[`j',5] = r(mean) 
mat tab[`j',6] = r(sd) 

ttest `var' , by(sample_notinfluenced)
local diff = r(mu_2) - r(mu_1)
mat tab[`j',7] = `diff'
mat tab[`j',8] = r(p) 

local j=`j'+1
}
restore
matrix list tab 

*------------------------ PREDICTORS OF TAX COLLECTION ------------------------*
/*
merge m:m cod_ibge using "${rawdata}/cng survey/cng_survey_noid.dta", keep(1 3) nogen

// tax priority standardized variable with respect to other priority responses
unab vbles: q21_1 q21_2 q21_4 q21_5 q21_6 q21_7
egen priority_m = rowmean(`vbles')
gen q21_3_demean = q21_3-priority_m
egen z_q21_3 = std(q21_3_demean)
drop priority_m q21_3_demean

preserve
keep if sample_notinfluenced == 1

// control variables that are used in tables
global Municipality popul_2016 gini_index poverty share_taxes_2010_2015
global Mayor male_mayor age_mayor college_mayor q24
global Political mandate2_mayor q22_1 q22_2 q22_3 q22_4 q22_5

// column 1 (Municipality controls)
reg z_q21_3 $Municipality, vce(robust)

matrix pred1 =  r(table)'
mat list pred1
scalar pred1_r2 = e(r2)
scalar pred1_obs = e(N)

// column 2 (+ Mayor controls)
reg z_q21_3 $Municipality $Mayor, vce(robust)

matrix pred2 =  r(table)'
mat list pred2
scalar pred2_r2 = e(r2)
scalar pred2_obs = e(N)

// column 3 (+ Political controls)
reg z_q21_3 $Municipality $Mayor $Political, vce(robust)

matrix pred3 =  r(table)'
mat list pred3
scalar pred3_r2 = e(r2)
scalar pred3_obs = e(N)
restore 

*-------------------- PREDICTORS OF TAX POLICIES BELIEFS ----------------------*

preserve
keep if sample_notinfluenced == 1

global q31 q31_1_1 q31_2_1 q31_3_1 q31_4_1 q31_5_1

foreach var in $q31 {

reg `var' $Municipality $Mayor  $Political, vce(robust)
matrix b`var' =  r(table)'
mat list b`var'
scalar b`var'_r2 = e(r2)
scalar b`var'_obs = e(N)
}
restore 

*------------------- PREDICTORS OF SESSION PARTICIPATION ----------------------*

egen effectiveness_max = rowmax(q31_1_1 q31_2_1 q31_3_1 q31_4_1 q31_5_1)
egen effectiveness_mean = rowmean(q31_1_1 q31_2_1 q31_3_1 q31_4_1 q31_5_1)
egen confidence_max = rowmax(q31_1_2 q31_2_2 q31_3_2 q31_4_2 q31_5_2)
egen confidence_mean = rowmean(q31_1_2 q31_2_2 q31_3_2 q31_4_2 q31_5_2)

keep if treatment_assignment==1
keep if sample_notinfluenced == 1

global sample_mean z_q21_3 q22_1 q22_2 q22_3 q22_4 q22_5 q24 effectiveness_mean confidence_mean
global sample_max z_q21_3 q22_1 q22_2 q22_3 q22_4 q22_5 q24 effectiveness_max confidence_max 

// column 1: max 
reg attended $sample_mean poverty college_mayor, vce(robust)
matrix full =  r(table)'
scalar full_r2 = e(r2)
scalar full_obs = e(N)
mat list full 

// column 2: mean
reg attended $sample_max poverty college_mayor, vce(robust)
matrix fullb =  r(table)'
scalar fullb_r2 = e(r2)
scalar fullb_obs = e(N)
mat list fullb 
*/

********************************************************************************
********************************* FIGURES **************************************
********************************************************************************

*----------------------------- HISTOGRAM Q2.1 ---------------------------------*

use "${rawdata}/cng survey/cng_survey_noid.dta", clear

keep if sample_notinfluenced == 1

local q21 q21_1 q21_2 q21_3 q21_4 q21_5 q21_6 q21_7

foreach var in `q21' {

egen aux_`var' = count(`var')
replace aux_`var' = 1/aux_`var'*100

graph bar (sum) aux_`var', over(`var') blabel(bar, format(%9.1f)) ytitle("") bar(1, fcolor(edkblue*0.5) color(white) ) graphregion(color(white))
}
drop aux*

*-------------------------- HISTOGRAM Q3.1-3.5 --------------------------------*

forvalues i = 1/5 {

gen all_response`i' = 1 if q31_`i'_2!=. & q31_`i'_1!=.
egen total_response`i' = sum(all_response`i') if all_response`i'!=.

gen conf`i'_1 = 1/total_response`i'*100 if (q31_`i'_2 == 1 | q31_`i'_2 == 2) & q31_`i'_1!=.
gen conf`i'_2 = 1/total_response`i'*100 if (q31_`i'_2 == 3 | q31_`i'_2 == 4 | q31_`i'_2 == 5) & q31_`i'_1!=.

preserve
collapse (sum) conf`i'_1 conf`i'_2, by(q31_`i'_1)
gen bar2 = conf`i'_1 + conf`i'_2
drop if q31_`i'_1 ==. 

format bar2 %12.1f

twoway bar conf`i'_1 q31_`i'_1, barw(0.6) color(edkblue*.95) lwidth(none) || ///
rbar conf`i'_1 bar2 q31_`i'_1, barw(0.6) color(edkblue*.7) lwidth(none)   || ///
scatter bar2 q31_`i'_1, ms(none) mla(bar2) mlabcolor(black) mlabpos(12) ///
ylabel(10(10)40) graphregion(color(white)) ///
legend(row(1) order(1 "Not Confident" 3 "" 2 "Confident")) ///
xla(1/8, valuelabel noticks angle(30)) xtitle("")
restore
}

********************************************************************************
******************************* APPENDIX TABLES ********************************
********************************************************************************

*-------------- CNG SURVEY RESPONSES - POLITICAL CONSTITUENTS -----------------*

/// question 2.1
unab q22_23: q22_1-q22_5
unab q22_23last: q22_2-q22_5

foreach var in `q22_23' {

preserve
egen total = count(`var') 
gen percentage = 1/total*100 

collapse (sum) percentage (mean) total, by(`var')
replace percentage = total if `var'==.
drop total 
rename `var' cat 
rename percentage `var' 
tempfile dta`var' 
save `dta`var'', replace
restore
}

preserve
use `dtaq22_1', clear
foreach var in `q22_23last' {
merge 1:1 cat using `dta`var'', keep(3) nogen
}

mkmat `q22_23', mat(q22_23)
mat list q22_23
restore

*------------- SUMMARY STATS Treatment AER paper & CNG mayors -----------------* 

use "${analysis}/secondary/samples.dta", clear

//merge datasets 
merge m:m cod_ibge using "${rawdata}/clean data/population.dta", keep(1 3) nogen
merge m:m cod_ibge using "${rawdata}/clean data/policyexperiment_mayor_char.dta", keep(1 3) nogen 
merge m:m cod_ibge using "${rawdata}/clean data/vote_margin.dta", keep(1 3) nogen 
merge m:m cod_ibge using "${rawdata}/clean data/census.dta", keep(1 3) nogen  
merge m:m cod_ibge using "${rawdata}/clean data/tax_revenues.dta", keep(1 3) nogen 
merge m:m cod_ibge using "${rawdata}/clean data/munic_survey.dta", keep(1 3) nogen

foreach var in "male_mayor" "college_mayor" "mandate2_mayor" "vtmargin_mayor" "party_left_mayor" "gini_index" "big_south" "share_taxes_2010_2015" "iptu_2015" "cad_issqn_2015"{
replace `var' = `var'*100 
}

replace popul_2016= popul_2016/1000 // poulation in thousands

keep if treatment_assignment == 1

// append restricted samples
preserve
drop treatment_assignment
keep if sample_session == 1  
tempfile sample_session
save `sample_session'
restore

drop sample_session 
append using `sample_session'

replace sample_session = 0 if treatment_assignment == 1 

global vars popul_2016 gini_index poverty share_taxes_2010_2015 /// 
male_mayor age_mayor college_mayor mandate2_mayor ///
iptu_2015 cad_issqn_2015 

mat tab=J(10,8,.)
local j=1

foreach var in $vars {

local lbl : variable label `var' 
dis "`lbl'"

// Column 1-3 (restricted sample) 
sum `var' if treatment_assignment == 1
mat tab[`j',1] = r(N) 
mat tab[`j',2] = r(mean) 
mat tab[`j',3] = r(sd) 
	
// Column 4-6 (all 5570 munic) 
sum `var' if sample_session == 1
mat tab[`j',4] = r(N) 
mat tab[`j',5] = r(mean) 
mat tab[`j',6] = r(sd) 

ttest `var' , by(sample_session)
local diff = r(mu_2) - r(mu_1)
mat tab[`j',7] = `diff'
mat tab[`j',8] = r(p) 

local j=`j'+1
}

matrix list tab 

